Import Modules
# Helper libraries
import datetime
from packaging import version
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from collections import Counter
import numpy as np
import pandas as pd
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
#from plot_keras_history import plot_history
import math
%matplotlib inline
np.set_printoptions(precision=3, suppress=True)
Load MNIST Dataset
(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()
EDA Training and Test Sets
print('x_train:\t{}'.format(x_train.shape))
print('y_train:\t{}'.format(y_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))
Review labels for training set
print("First ten labels training dataset:\n {}\n".format(y_train[0:10]))
Find frequency of each label in training and test sets
Counter(y_train).most_common()
Counter(y_test).most_common()
Plot sample images with their labels
fig = plt.figure(figsize = (15, 9))
for i in range(50):
plt.subplot(5, 10, 1+i)
plt.title(y_train[i])
plt.xticks([])
plt.yticks([])
plt.imshow(x_train[i].reshape(28,28), cmap='binary')
Apply one-hot encoding on the labels
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)
print("First ten entries of y_train:\n {}\n".format(y_train[0:10]))
print("First ten rows of one-hot y_train:\n {}".format(y_train_encoded[0:10,]))
print('y_train_encoded shape: ', y_train_encoded.shape)
print('y_test_encoded shape: ', y_test_encoded.shape)
Reshape the images to 1D arrays
# Before reshape:
print('x_train:\t{}'.format(x_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
# Reshape the images:
x_train_reshaped = np.reshape(x_train, (60000, 784))
x_test_reshaped = np.reshape(x_test, (10000, 784))
# After reshape:
print('x_train_reshaped shape: ', x_train_reshaped.shape)
print('x_test_reshaped shape: ', x_test_reshaped.shape)
# Take a look at the first reshaped training image:
print(set(x_train_reshaped[0]))
np.set_printoptions(linewidth=np.inf)
print("{}".format(x_train[2020]))
Rescale the elements of the reshaped images
x_train_norm = x_train_reshaped.astype('float32') / 255
x_test_norm = x_test_reshaped.astype('float32') / 255
# Take a look at the first reshaped and normalized training image:
print(set(x_train_norm[0]))
EXPERIMENT 1: Our dense neural network will consist of 784 input nodes, a hidden layer with 1 node and 10 output nodes (corresponding to the 10 digits). We use mnist.load_data() to get the 70,000 images divided into a set of 60,000 training images and 10,000 test images. We hold back 5,000 of the 60,000 training images for validation. After training the model, we group the 60,000 activation values of the hidden node for the (original) set of training images by the 10 predicted classes and visualize these sets of values using a boxplot. We expect the overlap between the range of values in the "boxes" to be minimal. In addition, we find the pattern that maximally activates the hidden node as a "warm up" exercise for similar analysis we will perform on CNN models in Assignment 2.
model1 = Sequential([
Dense(input_shape=[784], units = 1, activation = tf.nn.relu),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model1.summary()
keras.utils.plot_model(model1, "mnist_model.png", show_shapes=True)
model1.compile(optimizer='rmsprop',
loss = 'categorical_crossentropy',
metrics=['accuracy'])
history = model1.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
loss1, accuracy1 = model1.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy1 * 100)
preds1 = model1.predict(x_test_norm)
print('shape of preds: ', preds1.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred1 = np.argmax(preds1[start_index + i])
actual1 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred1 != actual1:
col = 'r'
plt.xlabel('i={} | pred1={} | true={}'.format(start_index + i, pred1, actual1), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds1[index])
plt.show()
history_dict1 = history.history
history_dict1.keys()
losses1 = history.history['loss']
accs1 = history.history['accuracy']
val_losses1 = history.history['val_loss']
val_accs1 = history.history['val_accuracy']
epochs = len(losses1)
print(val_losses1)
print(val_accs1)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses1, accs1], [val_losses1, val_accs1], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
# Get the predicted classes:
pred_classes1 = np.argmax(model1.predict(x_train_norm), axis=-1)
pred_classes1
conf_mx1 = tf.math.confusion_matrix(y_train, pred_classes1)
conf_mx1
conf_mx1 = tf.math.confusion_matrix(y_train, pred_classes1)
conf_mx1
print("First ten entries of the predictions:\n {}\n".format(pred_classes1[0:10]))
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df1 = pd.DataFrame(preds1[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df1.style.format("{:.2%}").background_gradient(cmap=cm)
def plot_confusion_matrix(matrix):
"""If you prefer color and a colorbar"""
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
cax = ax.matshow(matrix)
fig.colorbar(cax)
plt.figure(figsize=(16,8))
plt.matshow(conf_mx1, cmap=plt.cm.Blues, fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
def plot_digits(instances, pos, images_per_row=5, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
images = [instance.reshape(size,size) for instance in instances]
n_rows = (len(instances) - 1) // images_per_row + 1
row_images = []
n_empty = n_rows * images_per_row - len(instances)
images.append(np.zeros((size, size * n_empty)))
for row in range(n_rows):
rimages = images[row * images_per_row : (row + 1) * images_per_row]
row_images.append(np.concatenate(rimages, axis=1))
image = np.concatenate(row_images, axis=0)
pos.imshow(image, cmap = 'binary', **options)
pos.axis("off")
bplot1 = sns.boxplot(data=df1, width=0.5,palette="colorblind")
EXPERIMENT 2: This time our dense neural network will have 784 input nodes, a hidden layer with 2 nodes and 10 output nodes (corresponding to the 10 digits). For each of the 60,000 images, the output of the two hidden nodes are plotted using a scatterplot. We color code the points according to which of the 10 classes the the output of the two nodes predicts. Ideally, just like in EXPERIMENT 1, the color clusters should have very little overlap. Also compare the accuracy % & confusion matrix of Experiments 1 & 2. Again, the goal is to get more insights.
Compile the DNN Model
model2 = Sequential([
Dense(input_shape=[784], units = 2, activation = tf.nn.relu),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model2.summary()
keras.utils.plot_model(model2, "mnist_model.png", show_shapes=True)
model2.compile(optimizer='rmsprop',
loss = 'categorical_crossentropy',
metrics=['accuracy'])
Train the DNN Model
history = model2.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
Evaluate the DNN Model
loss2, accuracy2 = model2.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy2 * 100)
Making Predictions
preds2 = model2.predict(x_test_norm)
print('shape of preds: ', preds2.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred2 = np.argmax(preds2[start_index + i])
actual2 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred2 != actual2:
col = 'r'
plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred2, actual2), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds2[index])
plt.show()
Reviewing Performance
history_dict2 = history.history
history_dict2.keys()
history_dict2 = history.history
history_dict2.keys()
losses2 = history.history['loss']
accs2 = history.history['accuracy']
val_losses2 = history.history['val_loss']
val_accs2 = history.history['val_accuracy']
epochs = len(losses2)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses2, accs2], [val_losses2, val_accs2], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
Create the confusion matrix
# Get the predicted classes:
pred_classes2 = np.argmax(model2.predict(x_train_norm), axis=-1)
pred_classes2
conf_mx = tf.math.confusion_matrix(y_train, pred_classes2)
conf_mx
print("The first prediction\n {}\n".format(pred_classes2[0]))
print("First ten entries of the predictions:\n {}\n".format(pred_classes2[0:10]))
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df = pd.DataFrame(preds2[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
Visualize the confusion matrix
def plot_confusion_matrix(matrix):
"""If you prefer color and a colorbar"""
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
cax = ax.matshow(matrix)
fig.colorbar(cax)
plt.figure(figsize=(16,8))
plt.matshow(conf_mx, cmap=plt.cm.Blues, fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
def plot_digits(instances, pos, images_per_row=5, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
images = [instance.reshape(size,size) for instance in instances]
n_rows = (len(instances) - 1) // images_per_row + 1
row_images = []
n_empty = n_rows * images_per_row - len(instances)
images.append(np.zeros((size, size * n_empty)))
for row in range(n_rows):
rimages = images[row * images_per_row : (row + 1) * images_per_row]
row_images.append(np.concatenate(rimages, axis=1))
image = np.concatenate(row_images, axis=0)
pos.imshow(image, cmap = 'binary', **options)
pos.axis("off")
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes2 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes2 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes2 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes2 == cl_b)]
plt.figure(figsize=(6,6))
p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)
plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);
plot_digits(X_bb[:25], p4, images_per_row=5);
p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")
# plt.savefig("error_analysis_digits_plot_EXP1_valid")
plt.show()
# display 50 of the 4's classified correctly
plt.figure(figsize=(10,10))
plot_digits(X_aa[:50],plt,images_per_row = 10)
plt.figure(figsize=(10,10))
plot_digits(X_bb[:50],plt, images_per_row = 10)
Get the activation values of the hidden nodes
# Extracts the outputs of the 2 layers:
layer_outputs2 = [layer.output for layer in model2.layers]
# Creates a model that will return these outputs, given the model input:
activation_model2 = models.Model(inputs=model2.input, outputs=layer_outputs2)
print(f"There are {len(layer_outputs2)} layers")
layer_outputs2 # description of the layers
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations2 = activation_model2.predict(x_train_norm)
hidden_layer_activation2 = activations2[0]
output_layer_activations2 = activations2[1]
hidden_layer_activation2.shape # each of the 128 hidden nodes has one activation value per training image
output_layer_activations2.shape
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation2.max()}")
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True) # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation2 = activations2[1]
print(f"The output node has shape {ouput_layer_activation2.shape}")
print(f"The output for the first image are {ouput_layer_activation2[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation2[0].sum()}")
Create a DF with the activation values and class labels
#Get the dataframe of all the node values
activation_data2 = {'actual_class':y_train}
for k in range(0,2):
activation_data2[f"act_val_{k}"] = hidden_layer_activation2[:,k]
activation_df2 = pd.DataFrame(activation_data2)
activation_df2.head()
Visualize with boxplots
bplot = sns.boxplot(y='act_val_0', x='actual_class',
data=activation_df2[['act_val_0','actual_class']],
width=0.5,
palette="colorblind")
# displaying the range of activation values for each class labels
activation_df2.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
Create a dataframe with pixel values and class labels
#Get the dataframe of all the pixel values
pixel_data2 = {'actual_class':y_train}
for k in range(0,128):
pixel_data2[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df2 = pd.DataFrame(pixel_data2)
pixel_df2.head()
pixel_df2.pix_val_77.value_counts()
pixel_df2.pix_val_78.value_counts()
Scatter plot
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class", palette=color, data = pixel_df2, legend="full")
plt.legend(loc='upper left')
EXPERIMENT 3: You can explore with more hidden nodes. Then end up with 1 ‘final’ model. Say the ‘best’ model.
model3 = Sequential([
Dense(input_shape=[784], units = 200, activation = tf.nn.relu),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model3.summary()
keras.utils.plot_model(model3, "mnist_model.png", show_shapes=True)
model3.compile(optimizer='rmsprop',
loss = 'categorical_crossentropy',
metrics=['accuracy'])
history = model3.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
loss3, accuracy3 = model3.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy3 * 100)
preds3 = model3.predict(x_test_norm)
print('shape of preds: ', preds3.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred3 = np.argmax(preds3[start_index + i])
actual3 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred3 != actual3:
col = 'r'
plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred3, actual3), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds3[index])
plt.show()
history_dict3 = history.history
history_dict3.keys()
history_dict3 = history.history
history_dict3.keys()
losses3 = history.history['loss']
accs3 = history.history['accuracy']
val_losses3 = history.history['val_loss']
val_accs3 = history.history['val_accuracy']
epochs = len(losses3)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses3, accs3], [val_losses3, val_accs3], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
# Get the predicted classes:
pred_classes3 = np.argmax(model3.predict(x_train_norm), axis=-1)
pred_classes3
conf_mx3 = tf.math.confusion_matrix(y_train, pred_classes3)
conf_mx3
print("The first prediction\n {}\n".format(pred_classes3[0]))
print("First ten entries of the predictions:\n {}\n".format(pred_classes3[0:10]))
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df3 = pd.DataFrame(preds3[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
def plot_confusion_matrix(matrix):
"""If you prefer color and a colorbar"""
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
cax = ax.matshow(matrix)
fig.colorbar(cax)
plt.figure(figsize=(16,8))
plt.matshow(conf_mx3, cmap=plt.cm.Blues, fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
def plot_digits(instances, pos, images_per_row=5, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
images = [instance.reshape(size,size) for instance in instances]
n_rows = (len(instances) - 1) // images_per_row + 1
row_images = []
n_empty = n_rows * images_per_row - len(instances)
images.append(np.zeros((size, size * n_empty)))
for row in range(n_rows):
rimages = images[row * images_per_row : (row + 1) * images_per_row]
row_images.append(np.concatenate(rimages, axis=1))
image = np.concatenate(row_images, axis=0)
pos.imshow(image, cmap = 'binary', **options)
pos.axis("off")
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes3 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes3 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes3 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes3 == cl_b)]
plt.figure(figsize=(6,6))
p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)
plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);
plot_digits(X_bb[:25], p4, images_per_row=5);
p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")
# plt.savefig("error_analysis_digits_plot_EXP1_valid")
plt.show()
First, display 50 of the 4's classified correctly...
# display 50 of the 4's classified correctly
plt.figure(figsize=(10,10))
plot_digits(X_aa[:50],plt,images_per_row = 10)
Then all the 4's classified as 9's...
num = X_ab.shape[0]
plt.figure(figsize=(10,10))
plot_digits(X_ab[:num],plt, images_per_row = 10)
Then display 50 of the 9's classified correctly
plt.figure(figsize=(10,10))
plot_digits(X_bb[:50],plt, images_per_row = 10)
Finally, display al the 9's classified as 4's.
# display all the 7's classified as 3's
num = X_ba.shape[0]
plt.figure(figsize=(10,10))
plot_digits(X_ba[:num],plt, images_per_row = 10)
# Extracts the outputs of the 2 layers:
layer_outputs3 = [layer.output for layer in model3.layers]
# Creates a model that will return these outputs, given the model input:
activation_model3 = models.Model(inputs=model3.input, outputs=layer_outputs3)
print(f"There are {len(layer_outputs3)} layers")
layer_outputs3 # description of the layers
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations3 = activation_model3.predict(x_train_norm)
hidden_layer_activation3 = activations3[0]
output_layer_activations3 = activations3[1]
hidden_layer_activation3.shape # each of the 128 hidden nodes has one activation value per training image
output_layer_activations3.shape
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation3.max()}")
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True) # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation3 = activations3[1]
print(f"The output node has shape {ouput_layer_activation3.shape}")
print(f"The output for the first image are {ouput_layer_activation3[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation3[0].sum()}")
#Get the dataframe of all the node values
activation_data3 = {'actual_class':y_train}
for k in range(0,200):
activation_data3[f"act_val_{k}"] = hidden_layer_activation3[:,k]
activation_df3 = pd.DataFrame(activation_data3)
activation_df3.head()
# To see how closely the hidden node activation values correlate with the class labels
# Let us use seaborn for the boxplots this time.
bplot = sns.boxplot(y='act_val_0', x='actual_class',
data=activation_df3[['act_val_0','actual_class']],
width=0.5,
palette="colorblind")
# displaying the range of activation values for each class labels
activation_df3.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
#Get the dataframe of all the pixel values
pixel_data3 = {'actual_class':y_train}
for k in range(0,154):
pixel_data3[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df3 = pd.DataFrame(pixel_data3)
pixel_df3.head()
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class", palette=color, data = pixel_df3, legend="full")
plt.legend(loc='upper left')
Experiment 3 - second model
model4 = Sequential([
Dense(input_shape=[784], units = 50, activation = tf.nn.relu),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model4.summary()
keras.utils.plot_model(model4, "mnist_model.png", show_shapes=True)
model4.compile(optimizer='rmsprop',
loss = 'categorical_crossentropy',
metrics=['accuracy'])
history = model4.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
loss4, accuracy4 = model4.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy4 * 100)
preds4 = model4.predict(x_test_norm)
print('shape of preds: ', preds4.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred4 = np.argmax(preds4[start_index + i])
actual4 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred4 != actual3:
col = 'r'
plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred4, actual4), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds4[index])
plt.show()
history_dict4 = history.history
history_dict4.keys()
history_dict4 = history.history
history_dict4.keys()
losses4 = history.history['loss']
accs4 = history.history['accuracy']
val_losses4 = history.history['val_loss']
val_accs4 = history.history['val_accuracy']
epochs = len(losses4)
plt.figure(figsize=(16, 6))
for i, metrics in enumerate(zip([losses4, accs4], [val_losses4, val_accs4], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
Experiment 3 - Third model
model5 = Sequential([
Dense(input_shape=[784], units = 150, activation = tf.nn.relu),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model5.summary()
keras.utils.plot_model(model5, "mnist_model.png", show_shapes=True)
model5.compile(optimizer='rmsprop',
loss = 'categorical_crossentropy',
metrics=['accuracy'])
history = model5.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
loss5, accuracy5 = model5.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy5 * 100)
preds5 = model5.predict(x_test_norm)
print('shape of preds: ', preds5.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred5 = np.argmax(preds5[start_index + i])
actual5 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred5 != actual5:
col = 'r'
plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred5, actual5), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds5[index])
plt.show()
history_dict5 = history.history
history_dict5.keys()
history_dict5 = history.history
history_dict5.keys()
losses5 = history.history['loss']
accs5 = history.history['accuracy']
val_losses5 = history.history['val_loss']
val_accs5 = history.history['val_accuracy']
epochs = len(losses5)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses5, accs5], [val_losses5, val_accs5], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
# Get the predicted classes:
pred_classes5 = np.argmax(model5.predict(x_train_norm), axis=-1)
pred_classes5
conf_mx5 = tf.math.confusion_matrix(y_train, pred_classes3)
conf_mx5
print("The first prediction\n {}\n".format(pred_classes5[0]))
print("First ten entries of the predictions:\n {}\n".format(pred_classes5[0:10]))
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df5 = pd.DataFrame(preds5[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
def plot_confusion_matrix(matrix):
"""If you prefer color and a colorbar"""
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
cax = ax.matshow(matrix)
fig.colorbar(cax)
plt.figure(figsize=(16,8))
plt.matshow(conf_mx3, cmap=plt.cm.Blues, fignum=1)
plt.xlabel("Predicted Classes")
plt.ylabel("Actual Classes")
plt.show()
def plot_digits(instances, pos, images_per_row=5, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
images = [instance.reshape(size,size) for instance in instances]
n_rows = (len(instances) - 1) // images_per_row + 1
row_images = []
n_empty = n_rows * images_per_row - len(instances)
images.append(np.zeros((size, size * n_empty)))
for row in range(n_rows):
rimages = images[row * images_per_row : (row + 1) * images_per_row]
row_images.append(np.concatenate(rimages, axis=1))
image = np.concatenate(row_images, axis=0)
pos.imshow(image, cmap = 'binary', **options)
pos.axis("off")
cl_a, cl_b = 4, 9
X_aa = x_train_norm[(y_train == cl_a) & (pred_classes5 == cl_a)]
X_ab = x_train_norm[(y_train == cl_a) & (pred_classes5 == cl_b)]
X_ba = x_train_norm[(y_train == cl_b) & (pred_classes5 == cl_a)]
X_bb = x_train_norm[(y_train == cl_b) & (pred_classes5 == cl_b)]
plt.figure(figsize=(6,6))
p1 = plt.subplot(221)
p2 = plt.subplot(222)
p3 = plt.subplot(223)
p4 = plt.subplot(224)
plot_digits(X_aa[:25], p1, images_per_row=5);
plot_digits(X_ab[:25], p2, images_per_row=5);
plot_digits(X_ba[:25], p3, images_per_row=5);
plot_digits(X_bb[:25], p4, images_per_row=5);
p1.set_title(f"{cl_a}'s classified as {cl_a}'s")
p2.set_title(f"{cl_a}'s classified as {cl_b}'s")
p3.set_title(f"{cl_b}'s classified as {cl_a}'s")
p4.set_title(f"{cl_b}'s classified as {cl_b}'s")
# plt.savefig("error_analysis_digits_plot_EXP1_valid")
plt.show()
# Extracts the outputs of the 2 layers:
layer_outputs5 = [layer.output for layer in model5.layers]
# Creates a model that will return these outputs, given the model input:
activation_model5 = models.Model(inputs=model5.input, outputs=layer_outputs5)
print(f"There are {len(layer_outputs5)} layers")
layer_outputs5 # description of the layers
# Get the outputs of all the hidden nodes for each of the 60000 training images
activations5 = activation_model5.predict(x_train_norm)
hidden_layer_activation5 = activations5[0]
output_layer_activations5 = activations5[1]
hidden_layer_activation5.shape # each of the 128 hidden nodes has one activation value per training image
output_layer_activations5.shape
print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation5.max()}")
# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True) # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation5 = activations5[1]
print(f"The output node has shape {ouput_layer_activation5.shape}")
print(f"The output for the first image are {ouput_layer_activation5[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation5[0].sum()}")
#Get the dataframe of all the node values
activation_data5 = {'actual_class':y_train}
for k in range(0,150):
activation_data5[f"act_val_{k}"] = hidden_layer_activation5[:,k]
activation_df5 = pd.DataFrame(activation_data5)
activation_df5.head()
bplot = sns.boxplot(y='act_val_0', x='actual_class',
data=activation_df5[['act_val_0','actual_class']],
width=0.5,
palette="colorblind")
# displaying the range of activation values for each class labels
activation_df5.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})
#Get the dataframe of all the pixel values
pixel_data5 = {'actual_class':y_train}
for k in range(0,154):
pixel_data5[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df5 = pd.DataFrame(pixel_data5)
pixel_df5.head()
plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class", palette=color, data = pixel_df5, legend="full")
plt.legend(loc='upper left')
Experiment 3 - extra testing model
model6 = Sequential([
Dense(input_shape=[784], units = 150, activation = tf.nn.sigmoid),
Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])
model6.summary()
keras.utils.plot_model(model6, "mnist_model.png", show_shapes=True)
model6.compile(optimizer='rmsprop',
loss = 'squared_hinge',
metrics=['accuracy'])
history = model6.fit(
x_train_norm
,y_train_encoded
,epochs = 200
,validation_split=0.20
,callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)
loss6, accuracy6 = model6.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy6 * 100)
preds6 = model6.predict(x_test_norm)
print('shape of preds: ', preds6.shape)
plt.figure(figsize = (12, 12))
start_index = 0
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.grid(False)
plt.xticks([])
plt.yticks([])
pred6 = np.argmax(preds6[start_index + i])
actual6 = np.argmax(y_test_encoded[start_index + i])
col = 'g'
if pred6 != actual6:
col = 'r'
plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred6, actual6), color = col)
plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()
index = 17
plt.plot(preds6[index])
plt.show()
history_dict6 = history.history
history_dict6.keys()
history_dict6 = history.history
history_dict6.keys()
losses6 = history.history['loss']
accs6 = history.history['accuracy']
val_losses6 = history.history['val_loss']
val_accs6 = history.history['val_accuracy']
epochs = len(losses6)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses6, accs6], [val_losses6, val_accs6], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
EXPERIMENT 4: Use PCA decomposition to reduce the number of dimensions of our training set of 28x28 dimensional MNIST images from 784 to 154 (with 95% of training images variance lying along these components). We also reduce the number of dimensions of 'best' model from Experiment 3 to 154 inputs nodes and train it on the new lower dimensional data. We then compare the performance of Experiments 3 and 4.
# from sklearn.decomposition import PCA
# Separating out the features
features = [*pixel_data5][1:]
x = pixel_df5.loc[:, features].values
pca = PCA(n_components=154)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents)
pixel_pca_df = pd.concat([principalDf, pixel_df5[['actual_class']]], axis = 1)
pixel_pca_df.head()
pca.explained_variance_ratio_
Use PCA decomposition to reduce activation features from 128 to 2
# Separating out the features
features = [*activation_data5][1:]
x = activation_df5.loc[:, features].values
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
, columns = ['principal component 1', 'principal component 2'])
principalDf.head()
activation_pca_df = pd.concat([principalDf, activation_df5[['actual_class']]], axis = 1)
activation_pca_df.head()
pca.explained_variance_ratio_
Use scatterplot to visualize predictive power of two principal component values
plt.figure(figsize=(16,10))
sns.scatterplot(
x="principal component 1", y="principal component 2",
hue="actual_class",
palette=sns.color_palette("hls", 10),
data=activation_pca_df,
legend="full",
alpha=0.3
)
Use PCA decomposition to reduce features from 128 to 3
# Separating out the features
features = [*activation_data5][1:] # ['act_val_0', 'act_val_1',...]
x = activation_df5.loc[:, features].values
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents
, columns = ['pca-one', 'pca-two', 'pca-three'])
principalDf.head()
pca.explained_variance_ratio_
activation_pca_df = pd.concat([principalDf, activation_df5[['actual_class']]], axis = 1)
activation_pca_df.head()
Use scatter plot to visualize predictive power of 3 principal componenet values
# uncomment to to able to rotate the graph...
# %matplotlib notebook
ax = plt.figure(figsize=(16,10)).gca(projection='3d')
ax.scatter(
xs=activation_pca_df.loc[:,"pca-one"],
ys=activation_pca_df.loc[:,"pca-two"],
zs=activation_pca_df.loc[:,"pca-three"],
c=activation_pca_df.loc[:,"actual_class"],
cmap='tab10'
)
ax.set_xlabel('pca-one')
ax.set_ylabel('pca-two')
ax.set_zlabel('pca-three')
plt.show()
Use t-Distributed Stochastic Neighbor Embedding (t-SNE) to reduce the (activation) features from 128 (= num of hidden nodes) to 2
N=10000
activation_df_subset = activation_df5.iloc[:N].copy()
activation_df_subset.shape
data_subset = activation_df_subset[features].values
data_subset.shape
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(data_subset)
activation_df_subset['tsne-2d-one'] = tsne_results[:,0]
activation_df_subset['tsne-2d-two'] = tsne_results[:,1]
plt.figure(figsize=(16,10))
sns.scatterplot(
x="tsne-2d-one", y="tsne-2d-two",
hue="actual_class",
palette=sns.color_palette("hls", 10),
data=activation_df_subset,
legend="full",
alpha=0.3
)
EXPERIMENT 5: We use a Random Forest classifier to get the relative importance of the 784 features (pixels) of the 28x28 dimensional images in training set of MNIST images and select the top 70 features (pixels). We train our 'best' dense neural network using these 70 features and compare its performance to the the dense neural network models from EXPERIMENTS 3 and 4.
Reducing dimensionality with random forest
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf.fit(x_train_norm,y_train_encoded)
def plot_digit(data):
image = data.reshape(28, 28)
plt.imshow(image, cmap = 'hot',
interpolation="nearest")
plt.axis("off")
plot_digit(rnd_clf.feature_importances_)
cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])
cbar.ax.set_yticklabels(['Not important', 'Very important'])
plt.show()
n = 70
imp_arr = rnd_clf.feature_importances_
idx = (-imp_arr).argsort()[:n] # get the indices of the 70 "most important" features/pixels
len(idx)
# Create training and test images using just the 70 pixel locations obtained above
train_images_sm = x_train_norm[:,idx]
test_images_sm = x_test_norm[:,idx]
train_images_sm.shape, test_images_sm.shape
Visualize 70 pixels
# to convert an index n, 0<= n < 784
def pair(n,size):
x = n//size
y = n%size
return x,y
plt.imshow(x_train_norm[1].reshape(28,28),cmap='binary')
x, y = np.array([pair(k,28) for k in idx]).T
plt.scatter(x,y,color='red',s=20)